{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 用户和活动关联关系处理\n",
    "\n",
    "\n",
    "整个数据集中活动数目（events.csv）太多，所以下面的处理我们找出只在训练集和测试集中出现的活动和用户集合，并对他们重新编制索引"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#保存数据\n",
    "import cPickle\n",
    "\n",
    "import itertools\n",
    "\n",
    "#处理事件字符串\n",
    "import datetime\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.io as sio\n",
    "import scipy.sparse as ss\n",
    "\n",
    "#相似度/距离\n",
    "import scipy.spatial.distance as ssd\n",
    "\n",
    "from collections import defaultdict\n",
    "from sklearn.preprocessing import normalize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "number of uniqueUsers :3391\n",
      "number of uniqueEvents :13418\n"
     ]
    }
   ],
   "source": [
    " \"\"\"\n",
    "我们只关心train和test中出现的user和event，因此重点处理这部分关联数据\n",
    "\n",
    "train.csv 有6列：\n",
    "user：用户ID\n",
    "event：活动ID\n",
    "invited：是否被邀请（0/1）\n",
    "timestamp：ISO-8601 UTC格式时间字符串，表示用户看到该活动的时间\n",
    "interested, and not_interested\n",
    "\n",
    "Test.csv 除了没有interested, and not_interested，其余列与train相同\n",
    " \"\"\"\n",
    "    \n",
    "# 统计训练集中有多少不同的用户的events\n",
    "uniqueUsers = set()\n",
    "uniqueEvents = set()\n",
    "\n",
    "#倒排表\n",
    "#统计每个用户参加的活动   / 每个活动参加的用户\n",
    "eventsForUser = defaultdict(set)\n",
    "usersForEvent = defaultdict(set)\n",
    "    \n",
    "for filename in [\"train.csv\", \"test.csv\"]:\n",
    "    f = open(filename, 'rb')\n",
    "    \n",
    "    #忽略第一行（列名字）\n",
    "    f.readline().strip().split(\",\")\n",
    "    \n",
    "    for line in f:    #对每条记录\n",
    "        cols = line.strip().split(\",\")\n",
    "        uniqueUsers.add(cols[0])   #第一列为用户ID\n",
    "        uniqueEvents.add(cols[1])   #第二列为活动ID\n",
    "        \n",
    "        #eventsForUser[cols[0]].add(cols[1])    #该用户参加了这个活动\n",
    "        #usersForEvent[cols[1]].add(cols[0])    #该活动被用户参加\n",
    "    f.close()\n",
    "\n",
    "\n",
    "n_uniqueUsers = len(uniqueUsers)\n",
    "n_uniqueEvents = len(uniqueEvents)\n",
    "\n",
    "print(\"number of uniqueUsers :%d\" % n_uniqueUsers)\n",
    "print(\"number of uniqueEvents :%d\" % n_uniqueEvents)\n",
    "\n",
    "#用户关系矩阵表，可用于后续LFM/SVD++处理的输入\n",
    "#这是一个稀疏矩阵，记录用户对活动感兴趣\n",
    "userEventScores = ss.dok_matrix((n_uniqueUsers, n_uniqueEvents))\n",
    "userIndex = dict()\n",
    "eventIndex = dict()\n",
    "\n",
    "#重新编码用户索引字典\n",
    "for i, u in enumerate(uniqueUsers):\n",
    "    userIndex[u] = i\n",
    "    \n",
    "#重新编码活动索引字典    \n",
    "for i, e in enumerate(uniqueEvents):\n",
    "    eventIndex[e] = i\n",
    "\n",
    "n_records = 0\n",
    "ftrain = open(\"train.csv\", 'rb')\n",
    "ftrain.readline()\n",
    "for line in ftrain:\n",
    "    cols = line.strip().split(\",\")\n",
    "    i = userIndex[cols[0]]  #用户\n",
    "    j = eventIndex[cols[1]] #活动\n",
    "    \n",
    "    eventsForUser[i].add(j)    #该用户参加了这个活动\n",
    "    usersForEvent[j].add(i)    #该活动被用户参加\n",
    "        \n",
    "    #userEventScores[i, j] = int(cols[4]) - int(cols[5])   #interested - not_interested\n",
    "    score = int(cols[4])\n",
    "    #if score == 0:  #0在稀疏矩阵中表示该元素不存在，因此借用-1表示interested=0\n",
    "    #userEventScores[i, j] = -1\n",
    "    #else:\n",
    "    userEventScores[i, j] = score\n",
    "ftrain.close()\n",
    "\n",
    "  \n",
    "##统计每个用户参加的活动，后续用于将用户朋友参加的活动影响到用户\n",
    "cPickle.dump(eventsForUser, open(\"PE_eventsForUser.pkl\", 'wb'))\n",
    "##统计活动参加的用户\n",
    "cPickle.dump(usersForEvent, open(\"PE_usersForEvent.pkl\", 'wb'))\n",
    "\n",
    "#保存用户-活动关系矩阵R，以备后用\n",
    "sio.mmwrite(\"PE_userEventScores\", userEventScores)\n",
    "\n",
    "\n",
    "#保存用户索引表\n",
    "cPickle.dump(userIndex, open(\"PE_userIndex.pkl\", 'wb'))\n",
    "#保存活动索引表\n",
    "cPickle.dump(eventIndex, open(\"PE_eventIndex.pkl\", 'wb'))\n",
    "\n",
    "    \n",
    "# 为了防止不必要的计算，我们找出来所有关联的用户 或者 关联的event\n",
    "# 所谓的关联用户，指的是至少在同一个event上有行为的用户pair\n",
    "# 关联的event指的是至少同一个user有行为的event pair\n",
    "uniqueUserPairs = set()\n",
    "uniqueEventPairs = set()\n",
    "for event in uniqueEvents:\n",
    "    i = eventIndex[event]\n",
    "    users = usersForEvent[i]\n",
    "    if len(users) > 2:\n",
    "        uniqueUserPairs.update(itertools.combinations(users, 2))\n",
    "        \n",
    "for user in uniqueUsers:\n",
    "    u = userIndex[user]\n",
    "    events = eventsForUser[u]\n",
    "    if len(events) > 2:\n",
    "        uniqueEventPairs.update(itertools.combinations(events, 2))\n",
    " \n",
    "#保存用户-事件关系对索引表\n",
    "cPickle.dump(uniqueUserPairs, open(\"FE_uniqueUserPairs.pkl\", 'wb'))\n",
    "cPickle.dump(uniqueEventPairs, open(\"PE_uniqueEventPairs.pkl\", 'wb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#训练集和测试集中出现的用户数目和事件数目远小于users.csv出现的用户数和events.csv出现的事件数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 以下为我增添的代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#读取活动索引文件\n",
    "data=cPickle.load(open(\"PE_eventIndex.pkl\", 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'433929966': 0,\n",
       " '4234683959': 1,\n",
       " '679447655': 2,\n",
       " '773396729': 3,\n",
       " '3822707736': 4,\n",
       " '1588485860': 5,\n",
       " '1465884852': 6,\n",
       " '1968232351': 7,\n",
       " '2051799233': 9,\n",
       " '447444721': 10,\n",
       " '2301723344': 11,\n",
       " '4028072926': 12,\n",
       " '1935879458': 13,\n",
       " '3109164411': 14,\n",
       " '833858713': 15,\n",
       " '186124210': 16,\n",
       " '1180225343': 17,\n",
       " '4016804175': 18,\n",
       " '2726295050': 19,\n",
       " '4266442206': 20,\n",
       " '2313587840': 21,\n",
       " '733261478': 1459,\n",
       " '1458906695': 23,\n",
       " '2450977738': 24,\n",
       " '915187958': 25,\n",
       " '407288007': 26,\n",
       " '3768548263': 27,\n",
       " '3051088984': 28,\n",
       " '1933276938': 29,\n",
       " '1770070953': 30,\n",
       " '3965935592': 31,\n",
       " '2812330223': 32,\n",
       " '3323980704': 33,\n",
       " '4049041156': 34,\n",
       " '3696272381': 35,\n",
       " '1281252091': 36,\n",
       " '1319656469': 37,\n",
       " '1872379613': 38,\n",
       " '914775174': 39,\n",
       " '230117918': 41,\n",
       " '127163854': 42,\n",
       " '1929917726': 43,\n",
       " '1343073449': 9192,\n",
       " '2527738096': 44,\n",
       " '417123290': 45,\n",
       " '1499530182': 46,\n",
       " '332142007': 47,\n",
       " '4126236405': 48,\n",
       " '570094895': 11877,\n",
       " '1880617811': 49,\n",
       " '106978589': 50,\n",
       " '2850509209': 51,\n",
       " '2507275690': 52,\n",
       " '3770866498': 53,\n",
       " '3349548561': 54,\n",
       " '4096532695': 55,\n",
       " '1286933492': 6574,\n",
       " '1607711017': 56,\n",
       " '2043006756': 57,\n",
       " '2606383328': 58,\n",
       " '1977406305': 59,\n",
       " '3172060382': 60,\n",
       " '2646392315': 61,\n",
       " '2912281746': 62,\n",
       " '1907550340': 63,\n",
       " '774093053': 64,\n",
       " '3327557579': 65,\n",
       " '3802650890': 66,\n",
       " '2055922065': 67,\n",
       " '3711909804': 68,\n",
       " '393394477': 69,\n",
       " '2896451394': 70,\n",
       " '4201550847': 71,\n",
       " '2474982115': 72,\n",
       " '3805108320': 73,\n",
       " '650745497': 74,\n",
       " '1291141020': 434,\n",
       " '973265540': 76,\n",
       " '806826749': 77,\n",
       " '2840088969': 78,\n",
       " '3700950547': 79,\n",
       " '1000481836': 80,\n",
       " '1959421009': 81,\n",
       " '3217192264': 82,\n",
       " '894829625': 83,\n",
       " '4293596113': 84,\n",
       " '3446267401': 86,\n",
       " '2415873572': 88,\n",
       " '169266244': 89,\n",
       " '2422402842': 90,\n",
       " '3523548061': 91,\n",
       " '3946739090': 92,\n",
       " '256821264': 93,\n",
       " '1310511988': 94,\n",
       " '1280269047': 5536,\n",
       " '345960905': 96,\n",
       " '1349844435': 97,\n",
       " '265740872': 99,\n",
       " '1880156476': 100,\n",
       " '3179167671': 101,\n",
       " '2818648337': 102,\n",
       " '1771812765': 103,\n",
       " '1889293012': 8943,\n",
       " '4241501571': 105,\n",
       " '1569316024': 106,\n",
       " '3354141479': 108,\n",
       " '682667459': 109,\n",
       " '4261753049': 110,\n",
       " '2078647608': 111,\n",
       " '321356225': 112,\n",
       " '1512828747': 113,\n",
       " '598708806': 114,\n",
       " '3161734222': 115,\n",
       " '4224194427': 11610,\n",
       " '915963444': 117,\n",
       " '2877011901': 118,\n",
       " '1073720066': 119,\n",
       " '2188481598': 120,\n",
       " '3197423454': 121,\n",
       " '3976610702': 122,\n",
       " '439311946': 123,\n",
       " '2723158684': 124,\n",
       " '2221840547': 125,\n",
       " '2847152365': 126,\n",
       " '2024105424': 127,\n",
       " '3312755383': 129,\n",
       " '4073832558': 130,\n",
       " '1963825952': 281,\n",
       " '4227274972': 132,\n",
       " '228340462': 133,\n",
       " '2562787688': 7963,\n",
       " '2005419859': 11922,\n",
       " '2075203466': 136,\n",
       " '533036818': 137,\n",
       " '861989233': 831,\n",
       " '1722911749': 139,\n",
       " '2166119628': 140,\n",
       " '2186351621': 141,\n",
       " '3273736995': 142,\n",
       " '4083994402': 143,\n",
       " '272875209': 144,\n",
       " '1868735086': 145,\n",
       " '1203574711': 146,\n",
       " '3571352784': 147,\n",
       " '2462996142': 148,\n",
       " '1158452780': 149,\n",
       " '1043841273': 150,\n",
       " '740148951': 151,\n",
       " '2456710844': 152,\n",
       " '234487772': 153,\n",
       " '3038741300': 154,\n",
       " '1177314523': 155,\n",
       " '83427780': 156,\n",
       " '1764893644': 157,\n",
       " '3812153136': 158,\n",
       " '1736406045': 159,\n",
       " '2280892735': 161,\n",
       " '376720958': 162,\n",
       " '1860786983': 164,\n",
       " '3413988347': 165,\n",
       " '2157645470': 3424,\n",
       " '8559115': 167,\n",
       " '1438024794': 168,\n",
       " '3507025494': 169,\n",
       " '1467809542': 170,\n",
       " '3759330615': 171,\n",
       " '3819567837': 172,\n",
       " '2752772467': 173,\n",
       " '3841578300': 175,\n",
       " '317875697': 176,\n",
       " '3523716576': 177,\n",
       " '981244440': 178,\n",
       " '3075319534': 179,\n",
       " '38825896': 180,\n",
       " '2103025461': 181,\n",
       " '1232039292': 182,\n",
       " '4031268561': 183,\n",
       " '2464636842': 184,\n",
       " '2512346948': 185,\n",
       " '1203717384': 186,\n",
       " '3697638490': 87,\n",
       " '390684326': 188,\n",
       " '1827283730': 189,\n",
       " '3594729381': 190,\n",
       " '2042697619': 191,\n",
       " '2962515012': 7061,\n",
       " '3949816728': 193,\n",
       " '4043937476': 194,\n",
       " '3037717443': 195,\n",
       " '2877314442': 196,\n",
       " '866923839': 197,\n",
       " '1061745506': 198,\n",
       " '655495803': 199,\n",
       " '486447403': 200,\n",
       " '1173365630': 201,\n",
       " '2626284779': 202,\n",
       " '2020531758': 1235,\n",
       " '3884256656': 204,\n",
       " '244220291': 205,\n",
       " '3057888073': 206,\n",
       " '3729283847': 207,\n",
       " '1430932461': 208,\n",
       " '3402209713': 209,\n",
       " '675888033': 210,\n",
       " '1915380946': 211,\n",
       " '3534091101': 212,\n",
       " '1824334392': 213,\n",
       " '2284893483': 11082,\n",
       " '3329566428': 215,\n",
       " '2762282302': 216,\n",
       " '2118802492': 217,\n",
       " '3549888444': 218,\n",
       " '1549888368': 219,\n",
       " '3327259603': 1631,\n",
       " '2598678508': 221,\n",
       " '3926022702': 222,\n",
       " '1633704318': 223,\n",
       " '3889211600': 225,\n",
       " '978829373': 226,\n",
       " '1374330612': 227,\n",
       " '3996685669': 228,\n",
       " '3400642235': 229,\n",
       " '2009824063': 8673,\n",
       " '2696649330': 1468,\n",
       " '1590675258': 231,\n",
       " '2700038507': 232,\n",
       " '984516743': 233,\n",
       " '1758417603': 234,\n",
       " '2235428011': 235,\n",
       " '1282404404': 236,\n",
       " '975577539': 239,\n",
       " '613668540': 240,\n",
       " '138523229': 241,\n",
       " '1759182938': 1571,\n",
       " '2560867051': 243,\n",
       " '4219250662': 244,\n",
       " '580186719': 11176,\n",
       " '1873976153': 246,\n",
       " '2867772846': 247,\n",
       " '2363192851': 248,\n",
       " '1861025224': 13272,\n",
       " '709512451': 251,\n",
       " '251657645': 252,\n",
       " '1298016856': 253,\n",
       " '3412307751': 254,\n",
       " '3352558065': 255,\n",
       " '513780850': 256,\n",
       " '2472934939': 257,\n",
       " '1462428678': 258,\n",
       " '2859386475': 259,\n",
       " '811652961': 260,\n",
       " '1429589754': 261,\n",
       " '1427394700': 262,\n",
       " '1417587050': 263,\n",
       " '750506418': 264,\n",
       " '1537177508': 265,\n",
       " '1810281079': 266,\n",
       " '3167757146': 267,\n",
       " '393416610': 268,\n",
       " '3069899537': 269,\n",
       " '536949855': 271,\n",
       " '2106861684': 272,\n",
       " '3729155908': 273,\n",
       " '1111361846': 274,\n",
       " '3267347122': 275,\n",
       " '371746452': 276,\n",
       " '1888695830': 277,\n",
       " '967501371': 6713,\n",
       " '1738181791': 279,\n",
       " '3633351483': 4280,\n",
       " '354333473': 282,\n",
       " '647866667': 283,\n",
       " '511362023': 284,\n",
       " '505464566': 285,\n",
       " '1255932343': 286,\n",
       " '1060440359': 2262,\n",
       " '1739388067': 288,\n",
       " '2232544430': 289,\n",
       " '3041724882': 290,\n",
       " '1985321109': 291,\n",
       " '1471308455': 292,\n",
       " '2272129750': 293,\n",
       " '4010042708': 294,\n",
       " '2134876228': 295,\n",
       " '1993199919': 296,\n",
       " '2832466526': 297,\n",
       " '3703433044': 298,\n",
       " '2961046023': 299,\n",
       " '435473446': 300,\n",
       " '3773409641': 302,\n",
       " '2913860360': 303,\n",
       " '55735397': 304,\n",
       " '2422072300': 305,\n",
       " '2458143482': 306,\n",
       " '1913672917': 307,\n",
       " '2156430059': 308,\n",
       " '4055585686': 309,\n",
       " '3667110496': 310,\n",
       " '2691509364': 311,\n",
       " '100417525': 312,\n",
       " '1139116596': 313,\n",
       " '377254812': 314,\n",
       " '3801433102': 315,\n",
       " '2157401626': 316,\n",
       " '1711604318': 317,\n",
       " '805963128': 318,\n",
       " '3619266504': 319,\n",
       " '1988291613': 320,\n",
       " '1381535648': 321,\n",
       " '2660205855': 322,\n",
       " '268203907': 323,\n",
       " '3404489024': 324,\n",
       " '263279656': 325,\n",
       " '3673014182': 326,\n",
       " '4244406355': 327,\n",
       " '25570068': 2101,\n",
       " '490590226': 329,\n",
       " '1828726073': 330,\n",
       " '2408267303': 331,\n",
       " '3319862596': 332,\n",
       " '171025395': 333,\n",
       " '677713566': 334,\n",
       " '61894853': 335,\n",
       " '1712448138': 336,\n",
       " '2407176743': 337,\n",
       " '3657419629': 338,\n",
       " '2007442218': 339,\n",
       " '1063154747': 340,\n",
       " '3630913433': 11274,\n",
       " '1366554501': 341,\n",
       " '3574237758': 342,\n",
       " '1295729468': 343,\n",
       " '3851706723': 344,\n",
       " '1988027954': 345,\n",
       " '3036332464': 346,\n",
       " '3365983510': 347,\n",
       " '2333818729': 2227,\n",
       " '582672945': 349,\n",
       " '3696948957': 350,\n",
       " '1294755971': 351,\n",
       " '4025878315': 2246,\n",
       " '232415600': 353,\n",
       " '1592001933': 354,\n",
       " '1656987460': 355,\n",
       " '1781009055': 356,\n",
       " '2670583521': 357,\n",
       " '4116595591': 358,\n",
       " '748898602': 9101,\n",
       " '2147374657': 7483,\n",
       " '4277799248': 2296,\n",
       " '844299203': 360,\n",
       " '135744766': 361,\n",
       " '615488222': 362,\n",
       " '4242816413': 363,\n",
       " '650456731': 364,\n",
       " '2851625619': 365,\n",
       " '322679569': 366,\n",
       " '2653321720': 367,\n",
       " '4092689670': 368,\n",
       " '274265388': 369,\n",
       " '1309574084': 370,\n",
       " '1218003893': 371,\n",
       " '3935738455': 372,\n",
       " '2796594102': 373,\n",
       " '495818697': 374,\n",
       " '407061424': 375,\n",
       " '3892571955': 376,\n",
       " '1930033127': 377,\n",
       " '526306322': 378,\n",
       " '794450376': 379,\n",
       " '3144013743': 380,\n",
       " '2448320909': 381,\n",
       " '1695480317': 382,\n",
       " '3407311067': 383,\n",
       " '122289878': 384,\n",
       " '2526925550': 270,\n",
       " '2750873665': 386,\n",
       " '3051804335': 387,\n",
       " '3843631351': 388,\n",
       " '3890406080': 389,\n",
       " '3434513569': 390,\n",
       " '1630243360': 391,\n",
       " '1184535341': 392,\n",
       " '1006903887': 393,\n",
       " '1093614806': 394,\n",
       " '2129718710': 395,\n",
       " '1341096506': 396,\n",
       " '1657604679': 398,\n",
       " '1860511950': 399,\n",
       " '1854380763': 400,\n",
       " '2017624114': 401,\n",
       " '1365361942': 402,\n",
       " '146887909': 403,\n",
       " '3221466050': 404,\n",
       " '2794180407': 405,\n",
       " '450183377': 406,\n",
       " '553840202': 407,\n",
       " '321597084': 408,\n",
       " '679700922': 409,\n",
       " '1889940073': 410,\n",
       " '3890023938': 412,\n",
       " '3617812300': 413,\n",
       " '826832481': 414,\n",
       " '1190152428': 415,\n",
       " '2706390147': 416,\n",
       " '1935315082': 417,\n",
       " '2644800408': 418,\n",
       " '2906696294': 419,\n",
       " '525889178': 420,\n",
       " '1033421355': 421,\n",
       " '4172382349': 422,\n",
       " '3021023405': 423,\n",
       " '4072078077': 424,\n",
       " '4205511631': 425,\n",
       " '4264461482': 426,\n",
       " '2844691787': 427,\n",
       " '1910339245': 8759,\n",
       " '4070309332': 428,\n",
       " '3610112479': 429,\n",
       " '619666754': 431,\n",
       " '3938234624': 432,\n",
       " '2077865887': 433,\n",
       " '737340986': 435,\n",
       " '1686028847': 436,\n",
       " '2190223358': 437,\n",
       " '3855173861': 438,\n",
       " '3318022618': 439,\n",
       " '383607907': 440,\n",
       " '4280897548': 1850,\n",
       " '3988717856': 442,\n",
       " '582287969': 443,\n",
       " '3119717504': 444,\n",
       " '704244113': 445,\n",
       " '2996288314': 446,\n",
       " '3913698961': 447,\n",
       " '536059045': 448,\n",
       " '1363545588': 449,\n",
       " '2588804869': 450,\n",
       " '2409474647': 451,\n",
       " '553125617': 452,\n",
       " '1764172149': 453,\n",
       " '2115776454': 454,\n",
       " '1507489572': 455,\n",
       " '3596095472': 5740,\n",
       " '4027073137': 456,\n",
       " '4192978272': 457,\n",
       " '4037816474': 458,\n",
       " '4120806416': 459,\n",
       " '2706955572': 460,\n",
       " '51076977': 461,\n",
       " '512403156': 462,\n",
       " '3865518711': 463,\n",
       " '2368738722': 464,\n",
       " '3441655380': 465,\n",
       " '1819998447': 466,\n",
       " '3531332626': 467,\n",
       " '3980181155': 468,\n",
       " '643979828': 469,\n",
       " '1640004842': 470,\n",
       " '3289739345': 471,\n",
       " '1948173913': 472,\n",
       " '3434354867': 473,\n",
       " '2384254802': 474,\n",
       " '3402377961': 475,\n",
       " '1647257656': 476,\n",
       " '3183605169': 477,\n",
       " '4156705844': 478,\n",
       " '2928192183': 479,\n",
       " '1077825961': 480,\n",
       " '1187117667': 481,\n",
       " '1527128646': 482,\n",
       " '3501509607': 483,\n",
       " '99226238': 484,\n",
       " '1403176651': 485,\n",
       " '2572117226': 486,\n",
       " '3021122125': 3073,\n",
       " '1146130134': 488,\n",
       " '2263694418': 489,\n",
       " '3187969599': 490,\n",
       " '410842839': 491,\n",
       " '2828360569': 492,\n",
       " '1807883016': 493,\n",
       " '1179474743': 494,\n",
       " '1099380892': 495,\n",
       " '2971102319': 496,\n",
       " '2944816123': 497,\n",
       " '3268726556': 498,\n",
       " '457253413': 499,\n",
       " '3534827249': 500,\n",
       " '2834364568': 501,\n",
       " '3788504448': 502,\n",
       " '1565011423': 503,\n",
       " '3190717208': 504,\n",
       " '4202744793': 505,\n",
       " '3500532250': 506,\n",
       " '2568221006': 507,\n",
       " '2418119146': 5537,\n",
       " '2180353936': 509,\n",
       " '1315709702': 510,\n",
       " '2559164171': 511,\n",
       " '3219983221': 512,\n",
       " '3286516126': 513,\n",
       " '866903362': 514,\n",
       " '2370228216': 515,\n",
       " '353850493': 516,\n",
       " '3823594890': 517,\n",
       " '2798408743': 518,\n",
       " '2450647909': 4307,\n",
       " '326944617': 520,\n",
       " '1237174145': 521,\n",
       " '4000643494': 522,\n",
       " '1245942866': 523,\n",
       " '2671865684': 524,\n",
       " '1955298811': 525,\n",
       " '3846957590': 11535,\n",
       " '2274599788': 527,\n",
       " '974024618': 528,\n",
       " '914240358': 530,\n",
       " '2178315774': 531,\n",
       " '1819771813': 532,\n",
       " '785309813': 533,\n",
       " '3492796375': 534,\n",
       " '519767743': 535,\n",
       " '3075707957': 536,\n",
       " '769827874': 537,\n",
       " '2036167774': 538,\n",
       " '4254934247': 539,\n",
       " '58444357': 540,\n",
       " '1005830738': 541,\n",
       " '2608593001': 542,\n",
       " '1387775604': 543,\n",
       " '1977475739': 544,\n",
       " '2444024386': 545,\n",
       " '85675684': 546,\n",
       " '3466325304': 547,\n",
       " '3275948662': 548,\n",
       " '2301324542': 549,\n",
       " '3946619910': 550,\n",
       " '787562214': 551,\n",
       " '1329952040': 552,\n",
       " '3067222491': 3391,\n",
       " '3089507532': 553,\n",
       " '67648066': 554,\n",
       " '3211869804': 555,\n",
       " '1296215153': 556,\n",
       " '2704376528': 557,\n",
       " '643535605': 558,\n",
       " '3701320707': 559,\n",
       " '2209898896': 814,\n",
       " '3129396339': 560,\n",
       " '1711311187': 3440,\n",
       " '3931717290': 562,\n",
       " '896485186': 563,\n",
       " '1824198418': 564,\n",
       " '1829771929': 565,\n",
       " '526347239': 567,\n",
       " '1407801833': 568,\n",
       " '2084337326': 569,\n",
       " '3391397881': 570,\n",
       " '882150572': 3379,\n",
       " '1551306530': 571,\n",
       " '4276521055': 572,\n",
       " '1439724705': 573,\n",
       " '1700908753': 574,\n",
       " '1783630773': 575,\n",
       " '1605832718': 576,\n",
       " '2071994542': 577,\n",
       " '468922992': 578,\n",
       " '2614611034': 579,\n",
       " '1068118073': 3624,\n",
       " '2883057132': 581,\n",
       " '2422202663': 582,\n",
       " '1209548026': 583,\n",
       " '1955143671': 584,\n",
       " '1872585098': 585,\n",
       " '1793588465': 586,\n",
       " '3636792221': 587,\n",
       " '2020109352': 588,\n",
       " '424156215': 5673,\n",
       " '4086500156': 589,\n",
       " '4112668226': 590,\n",
       " '374048503': 591,\n",
       " '830183321': 592,\n",
       " '2808705599': 594,\n",
       " '3722242006': 595,\n",
       " '1044785443': 597,\n",
       " '1261557576': 598,\n",
       " '328882352': 599,\n",
       " '1281335602': 601,\n",
       " '1064284965': 602,\n",
       " '3022634670': 603,\n",
       " '1953860425': 604,\n",
       " '927693315': 605,\n",
       " '2533071551': 606,\n",
       " '2505738116': 607,\n",
       " '3602218872': 608,\n",
       " '693206007': 609,\n",
       " '663617415': 610,\n",
       " '2539026679': 611,\n",
       " '3413478237': 612,\n",
       " '3926217735': 613,\n",
       " '2505330132': 614,\n",
       " '832277631': 615,\n",
       " '3622325352': 616,\n",
       " '3200581311': 617,\n",
       " '2053603846': 618,\n",
       " '2704232594': 619,\n",
       " '1582270949': 620,\n",
       " '4125204455': 621,\n",
       " '3981836848': 622,\n",
       " '3855347794': 623,\n",
       " '939509575': 624,\n",
       " '1801869689': 6437,\n",
       " '3754239356': 625,\n",
       " '1545607295': 626,\n",
       " '1163805498': 627,\n",
       " '2650490227': 628,\n",
       " '3017293541': 629,\n",
       " '2011450028': 630,\n",
       " '3231944809': 631,\n",
       " '3935076467': 632,\n",
       " '432884667': 633,\n",
       " '3341857073': 634,\n",
       " '3113660150': 635,\n",
       " '852324847': 637,\n",
       " '4071218196': 3976,\n",
       " '1351006732': 639,\n",
       " '3438993032': 8210,\n",
       " '1381165425': 640,\n",
       " '1274017391': 641,\n",
       " '3425996117': 642,\n",
       " '1986416649': 643,\n",
       " '1153175316': 644,\n",
       " '787628032': 645,\n",
       " '2028042509': 646,\n",
       " '3404987729': 647,\n",
       " '3067996970': 648,\n",
       " '3663891590': 10721,\n",
       " '263011441': 649,\n",
       " '1515079649': 566,\n",
       " '1883524093': 651,\n",
       " '3034841234': 652,\n",
       " '1154290723': 1460,\n",
       " '474449136': 653,\n",
       " '289899674': 654,\n",
       " '3225161223': 655,\n",
       " '788806703': 656,\n",
       " '947107369': 657,\n",
       " '1582951351': 658,\n",
       " '4265093803': 659,\n",
       " '1817434960': 660,\n",
       " '832111597': 661,\n",
       " '2219392559': 662,\n",
       " '2384973677': 663,\n",
       " '2118368309': 664,\n",
       " '2322818360': 1462,\n",
       " '3809205963': 665,\n",
       " '3430641796': 666,\n",
       " '3126297081': 667,\n",
       " '1844176380': 668,\n",
       " '4110062919': 669,\n",
       " '2993747793': 670,\n",
       " '878068450': 671,\n",
       " '2424637456': 672,\n",
       " '956438727': 673,\n",
       " '590488063': 674,\n",
       " '3621854275': 675,\n",
       " '268174971': 676,\n",
       " '4220792229': 677,\n",
       " '293751558': 678,\n",
       " '1801766962': 2324,\n",
       " '3319506135': 680,\n",
       " '322709533': 681,\n",
       " '2368911433': 682,\n",
       " '964120039': 683,\n",
       " '4099353109': 684,\n",
       " '1742102536': 685,\n",
       " '1359556127': 686,\n",
       " '1271755958': 687,\n",
       " '1018819627': 688,\n",
       " '3580442023': 689,\n",
       " '4232519602': 690,\n",
       " '1032588622': 691,\n",
       " '1342847040': 692,\n",
       " '2610561518': 693,\n",
       " '4002881841': 694,\n",
       " '1023712594': 695,\n",
       " '2509552492': 696,\n",
       " '3629737683': 697,\n",
       " '561449801': 698,\n",
       " '945869322': 699,\n",
       " '2179267582': 700,\n",
       " '1359749784': 701,\n",
       " '2766617713': 702,\n",
       " '2859654056': 703,\n",
       " '845902488': 704,\n",
       " '760916752': 705,\n",
       " '705007381': 706,\n",
       " '4080537914': 4396,\n",
       " '1764411835': 708,\n",
       " '1945597998': 709,\n",
       " '4104785185': 710,\n",
       " '170511774': 2903,\n",
       " '1143203761': 712,\n",
       " '3906291725': 713,\n",
       " '2748187097': 13074,\n",
       " '3171268449': 715,\n",
       " '176859368': 716,\n",
       " '2839635887': 717,\n",
       " '2391976137': 719,\n",
       " '2424074793': 720,\n",
       " '1023138400': 721,\n",
       " '426166781': 722,\n",
       " '249760834': 723,\n",
       " '3222871469': 724,\n",
       " '3834889020': 725,\n",
       " '3482282242': 726,\n",
       " '1255629030': 727,\n",
       " '3501001868': 728,\n",
       " '2802110996': 729,\n",
       " '1807182727': 730,\n",
       " '1340479010': 731,\n",
       " '172405986': 732,\n",
       " '3548928882': 733,\n",
       " '1062024228': 734,\n",
       " '3950787980': 735,\n",
       " '3635358150': 736,\n",
       " '1389265162': 737,\n",
       " '547383999': 738,\n",
       " '2895849260': 4558,\n",
       " '3656008998': 740,\n",
       " '3842924038': 741,\n",
       " '3658165839': 742,\n",
       " '4188214514': 743,\n",
       " '3086836700': 5413,\n",
       " '3008408180': 746,\n",
       " '1572089933': 747,\n",
       " '393162138': 748,\n",
       " '4051353547': 749,\n",
       " '1658768128': 750,\n",
       " '1506378274': 752,\n",
       " '743289246': 753,\n",
       " '1596190412': 754,\n",
       " '471488113': 755,\n",
       " '1327533901': 756,\n",
       " '3503174392': 4569,\n",
       " '2806794871': 758,\n",
       " '2893435883': 759,\n",
       " '3837372536': 11143,\n",
       " '35091389': 761,\n",
       " '2184211588': 762,\n",
       " '610221734': 764,\n",
       " '3284750825': 765,\n",
       " '3326401128': 766,\n",
       " '885971279': 767,\n",
       " '2692070381': 768,\n",
       " '3590807274': 4705,\n",
       " '3479738412': 770,\n",
       " '899213418': 771,\n",
       " '663120398': 772,\n",
       " '1764159783': 773,\n",
       " '1620415785': 774,\n",
       " '3579174462': 775,\n",
       " '1079899197': 776,\n",
       " '2554360190': 777,\n",
       " '3700378878': 779,\n",
       " '2567308538': 780,\n",
       " '1012343640': 781,\n",
       " '705138890': 782,\n",
       " '1142967652': 783,\n",
       " '2943640347': 784,\n",
       " '363024307': 1163,\n",
       " '3803531290': 786,\n",
       " '1722968298': 4576,\n",
       " '3436633625': 788,\n",
       " '679023125': 789,\n",
       " '2554361988': 790,\n",
       " '3250536690': 791,\n",
       " '2062625787': 792,\n",
       " '918116457': 793,\n",
       " '937839032': 4873,\n",
       " '816912378': 795,\n",
       " '2460957668': 796,\n",
       " '3632072502': 797,\n",
       " '2153348068': 798,\n",
       " '3304261393': 799,\n",
       " '3847927617': 800,\n",
       " '3671450770': 803,\n",
       " '3123256869': 804,\n",
       " '940376988': 805,\n",
       " '1430458652': 806,\n",
       " '667664881': 807,\n",
       " '2925109008': 808,\n",
       " '688975255': 809,\n",
       " '2963144429': 810,\n",
       " '1036961928': 811,\n",
       " '1395200630': 812,\n",
       " '951253136': 813,\n",
       " '915403718': 11344,\n",
       " '3926229879': 815,\n",
       " '1122283794': 816,\n",
       " '3628957138': 817,\n",
       " '1850184598': 818,\n",
       " '3161434996': 819,\n",
       " '1711893150': 820,\n",
       " '3547653054': 821,\n",
       " '1274527630': 822,\n",
       " '61104529': 823,\n",
       " '4262164602': 8469,\n",
       " '2840642149': 824,\n",
       " '3155836085': 825,\n",
       " '2063976566': 826,\n",
       " '3330051251': 828,\n",
       " '872878580': 829,\n",
       " '312581825': 830,\n",
       " '137465175': 138,\n",
       " '1120182267': 833,\n",
       " '3863651111': 834,\n",
       " '2420516619': 8439,\n",
       " '1486919469': 835,\n",
       " '4228109405': 836,\n",
       " '1960789592': 837,\n",
       " '63726188': 840,\n",
       " '2688159351': 841,\n",
       " '3369636425': 842,\n",
       " '87840033': 844,\n",
       " '3658522039': 9852,\n",
       " '1730501944': 845,\n",
       " '4235863537': 846,\n",
       " '2454980175': 847,\n",
       " '300126317': 848,\n",
       " '578037803': 849,\n",
       " '1401314683': 5281,\n",
       " '646715160': 851,\n",
       " '4266815563': 852,\n",
       " '4256067371': 853,\n",
       " '721082024': 854,\n",
       " '1294433853': 855,\n",
       " '2254075965': 856,\n",
       " '256505275': 857,\n",
       " '1170318717': 858,\n",
       " '3282413042': 10079,\n",
       " '468069570': 859,\n",
       " '3191533412': 860,\n",
       " '3321517547': 861,\n",
       " '626503343': 863,\n",
       " '1400053177': 864,\n",
       " '1170338316': 865,\n",
       " '1385126312': 866,\n",
       " '3429958607': 868,\n",
       " '2137154383': 869,\n",
       " '69856756': 5415,\n",
       " '1454534917': 871,\n",
       " '744858144': 872,\n",
       " '1757362667': 873,\n",
       " '2976928477': 874,\n",
       " '3453997003': 875,\n",
       " '2007279414': 876,\n",
       " '473090869': 877,\n",
       " '1020530176': 878,\n",
       " '3174177804': 879,\n",
       " '818142116': 5482,\n",
       " '181332418': 881,\n",
       " '536196786': 882,\n",
       " '493625429': 883,\n",
       " '3054051090': 884,\n",
       " '2269626530': 885,\n",
       " '2159444507': 886,\n",
       " '974085606': 887,\n",
       " '1396446223': 889,\n",
       " '1310931838': 890,\n",
       " '1669367540': 891,\n",
       " '2758697837': 892,\n",
       " '726662409': 893,\n",
       " '3432819058': 894,\n",
       " '2279381833': 895,\n",
       " '4004100709': 896,\n",
       " '3684919275': 897,\n",
       " '1769487166': 898,\n",
       " '815715465': 899,\n",
       " '2088284484': 900,\n",
       " '1554642353': 901,\n",
       " '3681999765': 902,\n",
       " '1812117472': 903,\n",
       " '64442250': 904,\n",
       " '692075109': 905,\n",
       " '3080242563': 906,\n",
       " '877202695': 907,\n",
       " '4189303160': 908,\n",
       " '1214605379': 909,\n",
       " '44607617': 910,\n",
       " '1696929787': 911,\n",
       " '1506510838': 912,\n",
       " '317758728': 914,\n",
       " '2505215665': 915,\n",
       " '54535450': 916,\n",
       " '1880608957': 917,\n",
       " '4181300264': 918,\n",
       " '493246293': 8325,\n",
       " '3416395266': 919,\n",
       " '2490438703': 920,\n",
       " '2021591040': 921,\n",
       " '4215173213': 922,\n",
       " '4100237668': 923,\n",
       " '3887323073': 924,\n",
       " '1951237429': 925,\n",
       " '3129233779': 926,\n",
       " '3080864770': 927,\n",
       " '1282392038': 928,\n",
       " '2491530958': 929,\n",
       " '3765974734': 930,\n",
       " '2518430453': 931,\n",
       " '3406035843': 932,\n",
       " '604019700': 933,\n",
       " '2213514822': 934,\n",
       " '1517730411': 12147,\n",
       " '2714036762': 936,\n",
       " '3045891165': 937,\n",
       " '1287884858': 939,\n",
       " '3099686443': 940,\n",
       " '565086586': 941,\n",
       " '1386545389': 942,\n",
       " '3123645626': 943,\n",
       " '887203538': 944,\n",
       " '3864658857': 945,\n",
       " '2378521366': 4234,\n",
       " '3794166180': 947,\n",
       " '4267881656': 948,\n",
       " '3596510482': 5165,\n",
       " '2038842201': 950,\n",
       " '1720077487': 951,\n",
       " '2585162162': 952,\n",
       " '3097220122': 953,\n",
       " '1701917923': 5908,\n",
       " '2352676247': 6820,\n",
       " '3592182997': 11379,\n",
       " '1633263987': 956,\n",
       " '2253311961': 957,\n",
       " '1704192379': 958,\n",
       " '3235943764': 959,\n",
       " '681905393': 4607,\n",
       " '1256540913': 961,\n",
       " '361295277': 962,\n",
       " '682567291': 963,\n",
       " '3949409640': 964,\n",
       " '1702401336': 965,\n",
       " '3237640011': 966,\n",
       " '1602227394': 967,\n",
       " '383563223': 968,\n",
       " '252821798': 969,\n",
       " '1355891623': 970,\n",
       " '2393885244': 971,\n",
       " '943870191': 972,\n",
       " '846069276': 973,\n",
       " '3622885988': 4975,\n",
       " '2363524424': 974,\n",
       " '1044854627': 975,\n",
       " '1565715575': 976,\n",
       " '110357109': 977,\n",
       " '1380051674': 978,\n",
       " '2780690728': 979,\n",
       " '396698671': 980,\n",
       " '1396454425': 981,\n",
       " '733567972': 982,\n",
       " '1272961219': 983,\n",
       " '316055690': 984,\n",
       " '398208897': 985,\n",
       " '1118969585': 986,\n",
       " '1855529308': 987,\n",
       " '613851355': 988,\n",
       " '1516820536': 989,\n",
       " '480523741': 991,\n",
       " '939563341': 992,\n",
       " '2145223158': 993,\n",
       " '3655295959': 994,\n",
       " '2452291959': 995,\n",
       " '791187416': 996,\n",
       " '3039309993': 997,\n",
       " '2157199483': 998,\n",
       " '1000018501': 999,\n",
       " '1092379167': 1000,\n",
       " '1967012522': 1001,\n",
       " '2225155651': 1002,\n",
       " '1217440720': 1003,\n",
       " '2936461848': 1004,\n",
       " '1556853759': 1005,\n",
       " '3276095034': 1006,\n",
       " '1829766016': 1007,\n",
       " '2520855981': 1008,\n",
       " '2992424940': 1009,\n",
       " '1082833698': 1010,\n",
       " '1711030046': 1011,\n",
       " '3928351581': 1012,\n",
       " '2829792984': 1013,\n",
       " '843676515': 1014,\n",
       " '3679301219': 1015,\n",
       " '652541886': 4028,\n",
       " '1512723504': 1017,\n",
       " '3173314627': 1018,\n",
       " '1343845939': 1019,\n",
       " ...}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cPickle.load(open(\"PE_eventIndex.pkl\", 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "#根据上面程序产生的训练与测试集的不同活动id,在活动数据(events.csv文件中)中找到这些活动\n",
    "event_ids=data.keys()\n",
    "events = pd.read_csv(\"events.csv\")\n",
    "eventdata=pd.DataFrame(columns=events.columns.values)\n",
    "for ids in event_ids:\n",
    "    eventdata=eventdata.append(events[events['event_id'].isin([ids])],ignore_index=True )\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "#将这些活动数据另存为一个新csv\n",
    "eventdata.to_csv(\"eventdata.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>c_1</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>433929966</td>\n",
       "      <td>3142941855</td>\n",
       "      <td>2012-10-26T12:50:00.003Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4234683959</td>\n",
       "      <td>1721067031</td>\n",
       "      <td>2012-10-11T00:00:00.001Z</td>\n",
       "      <td>Niagara Falls</td>\n",
       "      <td>ON</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Canada</td>\n",
       "      <td>43.092</td>\n",
       "      <td>-79.077</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>679447655</td>\n",
       "      <td>1363182686</td>\n",
       "      <td>2012-10-07T00:00:00.001Z</td>\n",
       "      <td>Queens</td>\n",
       "      <td>NY</td>\n",
       "      <td>NaN</td>\n",
       "      <td>United States</td>\n",
       "      <td>40.696</td>\n",
       "      <td>-73.821</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>773396729</td>\n",
       "      <td>3220659109</td>\n",
       "      <td>2012-11-14T08:00:00.003Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3822707736</td>\n",
       "      <td>3042018139</td>\n",
       "      <td>2012-11-18T21:00:00.003Z</td>\n",
       "      <td>Los Angeles</td>\n",
       "      <td>CA</td>\n",
       "      <td>90024</td>\n",
       "      <td>United States</td>\n",
       "      <td>34.072</td>\n",
       "      <td>-118.444</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>249</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     event_id     user_id                start_time           city state  \\\n",
       "0   433929966  3142941855  2012-10-26T12:50:00.003Z            NaN   NaN   \n",
       "1  4234683959  1721067031  2012-10-11T00:00:00.001Z  Niagara Falls    ON   \n",
       "2   679447655  1363182686  2012-10-07T00:00:00.001Z         Queens    NY   \n",
       "3   773396729  3220659109  2012-11-14T08:00:00.003Z            NaN   NaN   \n",
       "4  3822707736  3042018139  2012-11-18T21:00:00.003Z    Los Angeles    CA   \n",
       "\n",
       "     zip        country     lat      lng c_1   ...   c_92 c_93 c_94 c_95 c_96  \\\n",
       "0    NaN            NaN     NaN      NaN   0   ...      0    0    0    0    0   \n",
       "1    NaN         Canada  43.092  -79.077   1   ...      0    0    0    0    0   \n",
       "2    NaN  United States  40.696  -73.821   0   ...      0    0    0    0    0   \n",
       "3    NaN            NaN     NaN      NaN   2   ...      0    0    0    0    0   \n",
       "4  90024  United States  34.072 -118.444  18   ...      0    0    1    0    1   \n",
       "\n",
       "  c_97 c_98 c_99 c_100 c_other  \n",
       "0    0    0    0     0      27  \n",
       "1    1    0    0     0      39  \n",
       "2    0    0    0     0       2  \n",
       "3    0    0    0     0       3  \n",
       "4    2    0    0     1     249  \n",
       "\n",
       "[5 rows x 110 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eventdata.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 13418 entries, 0 to 13417\n",
      "Columns: 110 entries, event_id to c_other\n",
      "dtypes: float64(2), object(108)\n",
      "memory usage: 11.3+ MB\n"
     ]
    }
   ],
   "source": [
    "eventdata.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
